ADD_DEPENDENCIES(preload-seccomp syscall-names-h)
endif()
+IF(SECCOMP_SUPPORT)
+ SET(SOURCES_OCI_SECCOMP jail/seccomp-oci.c)
+ENDIF()
+
IF(JAIL_SUPPORT)
-ADD_EXECUTABLE(ujail jail/jail.c jail/elf.c jail/fs.c jail/capabilities.c)
+ADD_EXECUTABLE(ujail jail/jail.c jail/elf.c jail/fs.c jail/capabilities.c ${SOURCES_OCI_SECCOMP})
TARGET_LINK_LIBRARIES(ujail ${ubox} ${ubus} ${blobmsg_json})
INSTALL(TARGETS ujail
RUNTIME DESTINATION ${CMAKE_INSTALL_SBINDIR}
/*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 2.1
#define _GNU_SOURCE 1
#include <syslog.h>
#include <sys/prctl.h>
+#include <sys/capability.h>
#include <libubox/blobmsg.h>
#include <libubox/blobmsg_json.h>
#include "../capabilities-names.h"
#include "capabilities.h"
+#define JAIL_CAP_ERROR (1LLU << (CAP_LAST_CAP+1))
+#define JAIL_CAP_ALL (0xffffffffffffffffLLU)
+
static int find_capabilities(const char *name)
{
int i;
for (i = 0; i <= CAP_LAST_CAP; i++)
- if (capabilities_names[i] && !strcmp(capabilities_names[i], name))
+ if (capabilities_names[i] && !strcasecmp(capabilities_names[i], name))
return i;
return -1;
}
+enum {
+ OCI_CAPABILITIES_BOUNDING,
+ OCI_CAPABILITIES_EFFECTIVE,
+ OCI_CAPABILITIES_INHERITABLE,
+ OCI_CAPABILITIES_PERMITTED,
+ OCI_CAPABILITIES_AMBIENT,
+ __OCI_CAPABILITIES_MAX
+};
+
+static const struct blobmsg_policy oci_capabilities_policy[] = {
+ [OCI_CAPABILITIES_BOUNDING] = { "bounding", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_EFFECTIVE] = { "effective", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_INHERITABLE] = { "inheritable", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_PERMITTED] = { "permitted", BLOBMSG_TYPE_ARRAY },
+ [OCI_CAPABILITIES_AMBIENT] = { "ambient", BLOBMSG_TYPE_ARRAY },
+};
+
+static uint64_t parseOCIcap(struct blob_attr *msg)
+{
+ struct blob_attr *cur;
+ int rem;
+ uint64_t caps = 0;
+ int capnum;
+
+ /* each capset is optional, set all-1 mask if absent */
+ if (!msg)
+ return JAIL_CAP_ALL;
+
+ blobmsg_for_each_attr(cur, msg, rem) {
+ capnum = find_capabilities(blobmsg_get_string(cur));
+ if (capnum < 0)
+ return JAIL_CAP_ERROR;
+
+ caps |= (1LLU << capnum);
+ }
+
+ return caps;
+}
+
+int parseOCIcapabilities(struct jail_capset *capset, struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_CAPABILITIES_MAX];
+ uint64_t caps;
+ blobmsg_parse(oci_capabilities_policy, __OCI_CAPABILITIES_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_BOUNDING]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->bounding = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_EFFECTIVE]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->effective = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_INHERITABLE]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->inheritable = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_PERMITTED]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->permitted = caps;
+
+ caps = parseOCIcap(tb[OCI_CAPABILITIES_AMBIENT]);
+ if (caps == JAIL_CAP_ERROR)
+ return EINVAL;
+ else
+ capset->ambient = caps;
+
+ capset->apply = 1;
+
+ return 0;
+}
+
+
+int applyOCIcapabilities(struct jail_capset ocicapset)
+{
+ struct __user_cap_header_struct uh = {};
+ struct __user_cap_data_struct ud;
+ int cap;
+ int is_set;
+
+ if (!ocicapset.apply)
+ return 0;
+
+ /* drop from bounding set */
+ if (ocicapset.bounding != JAIL_CAP_ALL) {
+ for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
+ if (!prctl(PR_CAPBSET_READ, cap, 0, 0, 0)) {
+ /* can't raise */
+ if (ocicapset.bounding & (1LLU << cap))
+ ERROR("capability %s (%d) is not in bounding set\n", capabilities_names[cap], cap);
+
+ continue;
+ }
+ if ( (ocicapset.bounding & (1LLU << cap)) == 0) {
+ DEBUG("dropping capability %s (%d) from bounding set\n", capabilities_names[cap], cap);
+ if (prctl(PR_CAPBSET_DROP, cap, 0, 0, 0)) {
+ ERROR("prctl(PR_CAPBSET_DROP, %d) failed: %m\n", cap);
+ return errno;
+ }
+ } else {
+ DEBUG("keeping capability %s (%d)\n", capabilities_names[cap], cap);
+ }
+ }
+ }
+
+ /* set effective, permitted and inheritable */
+ uh.version = _LINUX_CAPABILITY_VERSION_3;
+ uh.pid = getpid();
+
+ if (capget(&uh, &ud)) {
+ ERROR("capget() failed\n");
+ return -1;
+ }
+
+ DEBUG("old capabilities: Pe=%08x Pp=%08x Pi=%08x\n", ud.effective, ud.permitted, ud.inheritable);
+
+ if (ocicapset.effective != JAIL_CAP_ALL)
+ ud.effective = ocicapset.effective;
+
+ if (ocicapset.permitted != JAIL_CAP_ALL)
+ ud.permitted = ocicapset.permitted;
+
+ if (ocicapset.inheritable != JAIL_CAP_ALL)
+ ud.inheritable = ocicapset.inheritable;
+
+ DEBUG("new capabilities: Pe=%08x Pp=%08x Pi=%08x\n", ud.effective, ud.permitted, ud.inheritable);
+
+ if (capset(&uh, &ud)) {
+ ERROR("capset() failed\n");
+ return -1;
+ }
+
+ /* edit ambient set */
+ if (ocicapset.ambient != JAIL_CAP_ALL) {
+ for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
+ is_set = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, cap, 0, 0);
+ if ( (ocicapset.ambient & (1LLU << cap)) == 0) {
+ if (is_set) {
+ DEBUG("dropping capability %s (%d) from ambient set\n", capabilities_names[cap], cap);
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, cap, 0, 0)) {
+ ERROR("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, %d, 0, 0) failed: %m\n", cap);
+ return errno;
+ }
+ }
+ } else {
+ if (!is_set) {
+ DEBUG("raising capability %s (%d) to ambient set\n", capabilities_names[cap], cap);
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0)) {\
+ ERROR("prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, %d, 0, 0) failed: %m\n", cap);
+ return errno;
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
int drop_capabilities(const char *file)
{
enum {
if (capdrop == 0LLU) {
DEBUG("cap.keep empty -> only dropping capabilities from cap.drop (blacklist)\n");
- capdrop = 0xffffffffffffffffLLU;
+ capdrop = JAIL_CAP_ALL;
} else {
DEBUG("cap.keep has at least one capability -> dropping every capabilities not in cap.keep (whitelist)\n");
}
#ifndef _JAIL_CAPABILITIES_H_
#define _JAIL_CAPABILITIES_H_
+#include <libubox/blobmsg.h>
+
+struct jail_capset {
+ uint64_t bounding;
+ uint64_t effective;
+ uint64_t inheritable;
+ uint64_t permitted;
+ uint64_t ambient;
+ uint8_t apply;
+};
+
int drop_capabilities(const char *file);
+int parseOCIcapabilities(struct jail_capset *capset, struct blob_attr *msg);
+int applyOCIcapabilities(struct jail_capset capset);
+
#endif
#include <libgen.h>
#include <sched.h>
#include <linux/limits.h>
+#include <linux/filter.h>
#include <signal.h>
#include "capabilities.h"
#include "fs.h"
#include "jail.h"
#include "log.h"
+#include "seccomp-oci.h"
+#include <libubox/utils.h>
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+#include <libubox/list.h>
+#include <libubox/vlist.h>
#include <libubox/uloop.h>
#include <libubus.h>
#define STACK_SIZE (1024 * 1024)
-#define OPT_ARGS "S:C:n:h:r:w:d:psulocU:G:NR:fFO:T:Ey"
+#define OPT_ARGS "S:C:n:h:r:w:d:psulocU:G:NR:fFO:T:EyJ:"
static struct {
char *name;
char *hostname;
char **jail_argv;
+ char *cwd;
char *seccomp;
+ struct sock_fprog *ociseccomp;
char *capabilities;
+ struct jail_capset capset;
char *user;
char *group;
char *extroot;
char *overlaydir;
char *tmpoverlaysize;
+ char **envp;
+ char *uidmap;
+ char *gidmap;
int no_new_privs;
int namespace;
int procfs;
int require_jail;
} opts;
+static struct blob_buf ocibuf;
extern int pivot_root(const char *new_root, const char *put_old);
}
static int mount_overlay(char *jail_root, char *overlaydir) {
- char *upperdir, *workdir, *optsstr;
+ char *upperdir, *workdir, *optsstr, *upperetc, *upperresolvconf;
const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s";
- int ret = -1;
+ int ret = -1, fd;
if (asprintf(&upperdir, "%s%s", overlaydir, "/upper") < 0)
goto out;
if (mkdir_p(upperdir, 0755) || mkdir_p(workdir, 0755))
goto opts_printf;
+/*
+ * make sure /etc/resolv.conf exists in overlay and is owned by jail userns root
+ * this is to work-around a bug in overlayfs described in the overlayfs-userns
+ * patch:
+ * 3. modification of a file 'hithere' which is in l but not yet
+ * in u, and which is not owned by T, is not allowed, even if
+ * writes to u are allowed. This may be a bug in overlayfs,
+ * but it is safe behavior.
+ */
+ if (asprintf(&upperetc, "%s/etc", upperdir) < 0)
+ goto opts_printf;
+
+ if (mkdir_p(upperetc, 0755))
+ goto upper_etc_printf;
+
+ if (asprintf(&upperresolvconf, "%s/resolv.conf", upperetc) < 0)
+ goto upper_etc_printf;
+
+ fd = creat(upperresolvconf, 0644);
+ if (fd == -1) {
+ ERROR("creat(%s) failed: %m\n", upperresolvconf);
+ goto upper_resolvconf_printf;
+ }
+ close(fd);
+
DEBUG("mount -t overlay %s %s (%s)\n", jail_root, jail_root, optsstr);
if (mount(jail_root, jail_root, "overlay", MS_NOATIME, optsstr))
ret = 0;
+upper_resolvconf_printf:
+ free(upperresolvconf);
+upper_etc_printf:
+ free(upperetc);
opts_printf:
free(optsstr);
work_printf:
return 0;
}
-static int write_uid_gid_map(pid_t child_pid, bool gidmap, int id)
+static int write_uid_gid_map(pid_t child_pid, bool gidmap, char *mapstr)
+{
+ int map_file;
+ char map_path[64];
+
+ if (snprintf(map_path, sizeof(map_path), "/proc/%d/%s",
+ child_pid, gidmap?"gid_map":"uid_map") < 0)
+ return -1;
+
+ if ((map_file = open(map_path, O_WRONLY)) == -1)
+ return -1;
+
+ if (dprintf(map_file, "%s", mapstr)) {
+ close(map_file);
+ return -1;
+ }
+
+ close(map_file);
+ free(mapstr);
+ return 0;
+}
+
+static int write_single_uid_gid_map(pid_t child_pid, bool gidmap, int id)
{
int map_file;
char map_path[64];
return -1;
}
- if (dprintf(setgroups_file, allow?"allow":"deny") == -1) {
+ if (dprintf(setgroups_file, "%s", allow?"allow":"deny") == -1) {
close(setgroups_file);
return -1;
}
static void set_jail_user(int pw_uid, int user_gid, int gr_gid)
{
- if ((user_gid != -1) && initgroups(opts.user, user_gid)) {
+ if (opts.user && (user_gid != -1) && initgroups(opts.user, user_gid)) {
ERROR("failed to initgroups() for user %s: %m\n", opts.user);
exit(EXIT_FAILURE);
}
}
#define MAX_ENVP 8
-static char** build_envp(const char *seccomp)
+static char** build_envp(const char *seccomp, char **ocienvp)
{
static char *envp[MAX_ENVP];
static char preload_var[PATH_MAX];
static char debug_var[] = "LD_DEBUG=all";
static char container_var[] = "container=ujail";
const char *preload_lib = find_lib("libpreload-seccomp.so");
+ char **addenv;
+
int count = 0;
if (seccomp && !preload_lib) {
if (debug > 1)
envp[count++] = debug_var;
+ addenv = ocienvp;
+ while (addenv && *addenv) {
+ envp[count++] = *(addenv++);
+ if (count >= MAX_ENVP) {
+ ERROR("environment limited to %d extra records, truncating\n", MAX_ENVP);
+ break;
+ }
+ }
return envp;
}
fprintf(stderr, " -T <size>\tuse tmpfs r/w overlayfs with <size>\n");
fprintf(stderr, " -E\t\tfail if jail cannot be setup\n");
fprintf(stderr, " -y\t\tprovide jail console\n");
+ fprintf(stderr, " -J <dir>\tstart OCI bundle\n");
fprintf(stderr, "\nWarning: by default root inside the jail is the same\n\
and he has the same powers as root outside the jail,\n\
thus he can escape the jail and/or break stuff.\n\
close(pipes[2]);
if (opts.namespace & CLONE_NEWUSER) {
- if (setgid(0) < 0) {
+ if (setregid(0, 0) < 0) {
ERROR("setgid\n");
exit(EXIT_FAILURE);
}
- if (setuid(0) < 0) {
+ if (setreuid(0, 0) < 0) {
ERROR("setuid\n");
exit(EXIT_FAILURE);
}
-// if (setgroups(0, NULL) < 0) {
-// ERROR("setgroups\n");
-// exit(EXIT_FAILURE);
-// }
+ if (setgroups(0, NULL) < 0) {
+ ERROR("setgroups\n");
+ exit(EXIT_FAILURE);
+ }
}
if (opts.namespace && opts.hostname && strlen(opts.hostname) > 0
exit(EXIT_FAILURE);
}
+ if (applyOCIcapabilities(opts.capset))
+ exit(EXIT_FAILURE);
+
if (opts.capabilities && drop_capabilities(opts.capabilities))
exit(EXIT_FAILURE);
if (!(opts.namespace & CLONE_NEWUSER)) {
get_jail_user(&pw_uid, &pw_gid, &gr_gid);
- set_jail_user(pw_uid, pw_gid, gr_gid);
+
+ set_jail_user(opts.pw_uid?:pw_uid, opts.pw_gid?:pw_gid, opts.gr_gid?:gr_gid);
}
- char **envp = build_envp(opts.seccomp);
+ char **envp = build_envp(opts.seccomp, opts.envp);
if (!envp)
exit(EXIT_FAILURE);
+ if (opts.ociseccomp && applyOCIlinuxseccomp(opts.ociseccomp))
+ exit(EXIT_FAILURE);
+
INFO("exec-ing %s\n", *opts.jail_argv);
execve(*opts.jail_argv, opts.jail_argv, envp);
/* we get there only if execve fails */
ubus_free(ctx);
}
+
+enum {
+ OCI_ROOT_PATH,
+ OCI_ROOT_READONLY,
+ __OCI_ROOT_MAX,
+};
+
+static const struct blobmsg_policy oci_root_policy[] = {
+ [OCI_ROOT_PATH] = { "path", BLOBMSG_TYPE_STRING },
+ [OCI_ROOT_READONLY] = { "readonly", BLOBMSG_TYPE_BOOL },
+};
+
+static int parseOCIroot(const char *jsonfile, struct blob_attr *msg)
+{
+ static char rootpath[PATH_MAX] = { 0 };
+ struct blob_attr *tb[__OCI_ROOT_MAX];
+ char *cur;
+
+ blobmsg_parse(oci_root_policy, __OCI_ROOT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_ROOT_PATH])
+ return ENODATA;
+
+ strncpy(rootpath, jsonfile, PATH_MAX);
+ cur = strrchr(rootpath, '/');
+
+ if (!cur)
+ return ENOTDIR;
+
+ *(++cur) = '\0';
+ strncat(rootpath, blobmsg_get_string(tb[OCI_ROOT_PATH]), PATH_MAX - (strlen(rootpath) + 1));
+
+ opts.extroot = rootpath;
+
+ opts.ronly = blobmsg_get_bool(tb[OCI_ROOT_READONLY]);
+
+ return 0;
+}
+
+
+enum {
+ OCI_MOUNT_SOURCE,
+ OCI_MOUNT_DESTINATION,
+ OCI_MOUNT_TYPE,
+ OCI_MOUNT_OPTIONS,
+ __OCI_MOUNT_MAX,
+};
+
+static const struct blobmsg_policy oci_mount_policy[] = {
+ [OCI_MOUNT_SOURCE] = { "source", BLOBMSG_TYPE_STRING },
+ [OCI_MOUNT_DESTINATION] = { "destination", BLOBMSG_TYPE_STRING },
+ [OCI_MOUNT_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_MOUNT_OPTIONS] = { "options", BLOBMSG_TYPE_ARRAY },
+};
+
+static int parseOCImount(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_MOUNT_MAX];
+
+ blobmsg_parse(oci_mount_policy, __OCI_MOUNT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_MOUNT_DESTINATION])
+ return EINVAL;
+
+ if (!strcmp("proc", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) &&
+ !strcmp("/proc", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) {
+ opts.procfs = true;
+ return 0;
+ }
+
+ if (!strcmp("sysfs", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) &&
+ !strcmp("/sys", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) {
+ opts.sysfs = true;
+ return 0;
+ }
+
+ if (!strcmp("tmpfs", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) &&
+ !strcmp("/dev", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) {
+ /* we always mount a small tmpfs on /dev */
+ return 0;
+ }
+
+ INFO("ignoring unsupported mount %s %s -t %s -o %s\n",
+ blobmsg_get_string(tb[OCI_MOUNT_SOURCE]),
+ blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]),
+ blobmsg_get_string(tb[OCI_MOUNT_TYPE]),
+ blobmsg_format_json(tb[OCI_MOUNT_OPTIONS], true));
+
+ return 0;
+};
+
+
+enum {
+ OCI_PROCESS_USER_UID,
+ OCI_PROCESS_USER_GID,
+ OCI_PROCESS_USER_UMASK,
+ OCI_PROCESS_USER_ADDITIONALGIDS,
+ __OCI_PROCESS_USER_MAX,
+};
+
+static const struct blobmsg_policy oci_process_user_policy[] = {
+ [OCI_PROCESS_USER_UID] = { "uid", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_USER_GID] = { "gid", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_USER_UMASK] = { "umask", BLOBMSG_TYPE_INT32 },
+ [OCI_PROCESS_USER_ADDITIONALGIDS] = { "additionalGids", BLOBMSG_TYPE_ARRAY },
+};
+
+static int parseOCIprocessuser(struct blob_attr *msg) {
+ struct blob_attr *tb[__OCI_PROCESS_USER_MAX];
+
+ blobmsg_parse(oci_process_user_policy, __OCI_PROCESS_USER_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_PROCESS_USER_UID])
+ opts.pw_uid = blobmsg_get_u32(tb[OCI_PROCESS_USER_UID]);
+
+ if (tb[OCI_PROCESS_USER_GID]) {
+ opts.pw_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]);
+ opts.gr_gid = blobmsg_get_u32(tb[OCI_PROCESS_USER_GID]);
+ }
+
+ /* ToDo: umask, additional GIDs */
+
+ return 0;
+}
+
+enum {
+ OCI_PROCESS_ARGS,
+ OCI_PROCESS_CAPABILITIES,
+ OCI_PROCESS_CWD,
+ OCI_PROCESS_ENV,
+ OCI_PROCESS_NONEWPRIVILEGES,
+ OCI_PROCESS_RLIMITS,
+ OCI_PROCESS_TERMINAL,
+ OCI_PROCESS_USER,
+ __OCI_PROCESS_MAX,
+};
+
+static const struct blobmsg_policy oci_process_policy[] = {
+ [OCI_PROCESS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
+ [OCI_PROCESS_CAPABILITIES] = { "capabilities", BLOBMSG_TYPE_TABLE },
+ [OCI_PROCESS_CWD] = { "cwd", BLOBMSG_TYPE_STRING },
+ [OCI_PROCESS_ENV] = { "env", BLOBMSG_TYPE_ARRAY },
+ [OCI_PROCESS_NONEWPRIVILEGES] = { "noNewPrivileges", BLOBMSG_TYPE_BOOL },
+ [OCI_PROCESS_RLIMITS] = { "rlimits", BLOBMSG_TYPE_ARRAY },
+ [OCI_PROCESS_TERMINAL] = { "terminal", BLOBMSG_TYPE_BOOL },
+ [OCI_PROCESS_USER] = { "user", BLOBMSG_TYPE_TABLE },
+};
+
+static int parseOCIprocess(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_PROCESS_MAX];
+ struct blob_attr *cur;
+ unsigned int sz = 0;
+ int rem;
+ int res;
+
+ blobmsg_parse(oci_process_policy, __OCI_PROCESS_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_PROCESS_ARGS])
+ return ENOENT;
+
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_ARGS], rem)
+ ++sz;
+
+ if (!sz)
+ return ENODATA;
+
+ opts.jail_argv = calloc(1 + sz, sizeof(char*));
+ if (!opts.jail_argv)
+ return ENOMEM;
+
+ sz = 0;
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_ARGS], rem)
+ opts.jail_argv[sz++] = blobmsg_get_string(cur);
+
+ opts.console = blobmsg_get_bool(tb[OCI_PROCESS_TERMINAL]);
+ opts.no_new_privs = blobmsg_get_bool(tb[OCI_PROCESS_NONEWPRIVILEGES]);
+
+ if (tb[OCI_PROCESS_CWD])
+ opts.cwd = blobmsg_get_string(tb[OCI_PROCESS_CWD]);
+
+ sz = 0;
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_ENV], rem)
+ ++sz;
+
+ if (sz > 0) {
+ opts.envp = calloc(1 + sz, sizeof(char*));
+ if (!opts.envp)
+ return ENOMEM;
+ }
+
+ sz = 0;
+ blobmsg_for_each_attr(cur, tb[OCI_PROCESS_ENV], rem)
+ opts.envp[sz++] = strdup(blobmsg_get_string(cur));
+
+ if (tb[OCI_PROCESS_USER] && (res = parseOCIprocessuser(tb[OCI_PROCESS_USER])))
+ return res;
+
+ if (tb[OCI_PROCESS_CAPABILITIES] &&
+ (res = parseOCIcapabilities(&opts.capset, tb[OCI_PROCESS_CAPABILITIES])))
+ return res;
+
+ /* ToDo: rlimits, capabilities */
+
+ return 0;
+}
+
+enum {
+ OCI_LINUX_NAMESPACE_TYPE,
+ OCI_LINUX_NAMESPACE_PATH,
+ __OCI_LINUX_NAMESPACE_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_namespace_policy[] = {
+ [OCI_LINUX_NAMESPACE_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_NAMESPACE_PATH] = { "path", BLOBMSG_TYPE_STRING },
+};
+
+static unsigned int resolve_nstype(char *type) {
+ if (!strcmp("pid", type))
+ return CLONE_NEWPID;
+ else if (!strcmp("network", type))
+ return CLONE_NEWNET;
+ else if (!strcmp("mount", type))
+ return CLONE_NEWNS;
+ else if (!strcmp("ipc", type))
+ return CLONE_NEWIPC;
+ else if (!strcmp("uts", type))
+ return CLONE_NEWUTS;
+ else if (!strcmp("user", type))
+ return CLONE_NEWUSER;
+ else if (!strcmp("cgroup", type))
+ return CLONE_NEWCGROUP;
+ else
+ return 0;
+}
+
+static int parseOCIlinuxns(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_NAMESPACE_MAX];
+
+
+ blobmsg_parse(oci_linux_namespace_policy, __OCI_LINUX_NAMESPACE_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_LINUX_NAMESPACE_TYPE])
+ return EINVAL;
+
+ if (tb[OCI_LINUX_NAMESPACE_PATH])
+ return ENOTSUP; /* ToDo */
+
+ opts.namespace |= resolve_nstype(blobmsg_get_string(tb[OCI_LINUX_NAMESPACE_TYPE]));
+
+ return 0;
+};
+
+
+enum {
+ OCI_LINUX_UIDGIDMAP_CONTAINERID,
+ OCI_LINUX_UIDGIDMAP_HOSTID,
+ OCI_LINUX_UIDGIDMAP_SIZE,
+ __OCI_LINUX_UIDGIDMAP_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_uidgidmap_policy[] = {
+ [OCI_LINUX_UIDGIDMAP_CONTAINERID] = { "containerID", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_UIDGIDMAP_HOSTID] = { "hostID", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_UIDGIDMAP_SIZE] = { "size", BLOBMSG_TYPE_INT32 },
+};
+
+static int parseOCIuidgidmappings(struct blob_attr *msg, bool is_gidmap)
+{
+ const char *map_format = "%d %d %d\n";
+ struct blob_attr *tb[__OCI_LINUX_UIDGIDMAP_MAX];
+ struct blob_attr *cur;
+ int rem, len;
+ char **mappings;
+ char *map, *curstr;
+ unsigned int cnt = 0;
+ size_t totallen = 0;
+
+ /* count number of mappings */
+ blobmsg_for_each_attr(cur, msg, rem)
+ cnt++;
+
+ if (!cnt)
+ return 0;
+
+ /* allocate array for mappings */
+ mappings = calloc(1 + cnt, sizeof(char*));
+ if (!mappings)
+ return ENOMEM;
+
+ mappings[cnt] = NULL;
+
+ cnt = 0;
+ blobmsg_for_each_attr(cur, msg, rem) {
+ blobmsg_parse(oci_linux_uidgidmap_policy, __OCI_LINUX_UIDGIDMAP_MAX, tb, blobmsg_data(cur), blobmsg_len(cur));
+
+ if (!tb[OCI_LINUX_UIDGIDMAP_CONTAINERID] ||
+ !tb[OCI_LINUX_UIDGIDMAP_HOSTID] ||
+ !tb[OCI_LINUX_UIDGIDMAP_SIZE])
+ return EINVAL;
+
+ /* write mapping line into allocated string */
+ len = asprintf(&mappings[cnt++], map_format,
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_CONTAINERID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_HOSTID]),
+ blobmsg_get_u32(tb[OCI_LINUX_UIDGIDMAP_SIZE]));
+
+ if (len < 0)
+ return ENOMEM;
+
+ totallen += len;
+ }
+
+ /* allocate combined mapping string */
+ map = calloc(1 + len, sizeof(char));
+ if (!map)
+ return ENOMEM;
+
+ map[0] = '\0';
+
+ /* concatenate mapping strings into combined string */
+ curstr = mappings[0];
+ while (curstr) {
+ strcat(map, curstr);
+ free(curstr++);
+ }
+ free(mappings);
+
+ if (is_gidmap)
+ opts.gidmap = map;
+ else
+ opts.uidmap = map;
+
+ return 0;
+}
+
+enum {
+ OCI_LINUX_RESOURCES,
+ OCI_LINUX_SECCOMP,
+ OCI_LINUX_SYSCTL,
+ OCI_LINUX_NAMESPACES,
+ OCI_LINUX_UIDMAPPINGS,
+ OCI_LINUX_GIDMAPPINGS,
+ OCI_LINUX_MASKEDPATHS,
+ OCI_LINUX_READONLYPATHS,
+ OCI_LINUX_ROOTFSPROPAGATION,
+ __OCI_LINUX_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_policy[] = {
+ [OCI_LINUX_RESOURCES] = { "resources", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_SECCOMP] = { "seccomp", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_SYSCTL] = { "sysctl", BLOBMSG_TYPE_TABLE },
+ [OCI_LINUX_NAMESPACES] = { "namespaces", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_UIDMAPPINGS] = { "uidMappings", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_GIDMAPPINGS] = { "gidMappings", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_MASKEDPATHS] = { "maskedPaths", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_READONLYPATHS] = { "readonlyPaths", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_ROOTFSPROPAGATION] = { "rootfsPropagation", BLOBMSG_TYPE_STRING },
+};
+
+static int parseOCIlinux(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_MAX];
+ struct blob_attr *cur;
+ int rem;
+ int res = 0;
+
+ blobmsg_parse(oci_linux_policy, __OCI_LINUX_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (tb[OCI_LINUX_NAMESPACES]) {
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_NAMESPACES], rem) {
+ res = parseOCIlinuxns(cur);
+ if (res)
+ return res;
+ }
+ }
+
+ if (tb[OCI_LINUX_UIDMAPPINGS]) {
+ res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 0);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_LINUX_GIDMAPPINGS]) {
+ res = parseOCIuidgidmappings(tb[OCI_LINUX_GIDMAPPINGS], 1);
+ if (res)
+ return res;
+ }
+
+ if (tb[OCI_LINUX_SECCOMP]) {
+ opts.ociseccomp = parseOCIlinuxseccomp(tb[OCI_LINUX_SECCOMP]);
+ if (!opts.ociseccomp)
+ return EINVAL;
+ }
+
+ return 0;
+}
+
+enum {
+ OCI_VERSION,
+ OCI_HOSTNAME,
+ OCI_PROCESS,
+ OCI_ROOT,
+ OCI_MOUNTS,
+ OCI_LINUX,
+ __OCI_MAX,
+};
+
+static const struct blobmsg_policy oci_policy[] = {
+ [OCI_VERSION] = { "ociVersion", BLOBMSG_TYPE_STRING },
+ [OCI_HOSTNAME] = { "hostname", BLOBMSG_TYPE_STRING },
+ [OCI_PROCESS] = { "process", BLOBMSG_TYPE_TABLE },
+ [OCI_ROOT] = { "root", BLOBMSG_TYPE_TABLE },
+ [OCI_MOUNTS] = { "mounts", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX] = { "linux", BLOBMSG_TYPE_TABLE },
+};
+
+static int parseOCI(const char *jsonfile)
+{
+ struct blob_attr *tb[__OCI_MAX];
+ struct blob_attr *cur;
+ int rem;
+ int res;
+
+ blob_buf_init(&ocibuf, 0);
+ if (!blobmsg_add_json_from_file(&ocibuf, jsonfile))
+ return ENOENT;
+
+ blobmsg_parse(oci_policy, __OCI_MAX, tb, blob_data(ocibuf.head), blob_len(ocibuf.head));
+
+ if (!tb[OCI_VERSION])
+ return ENOMSG;
+
+ if (strncmp("1.0", blobmsg_get_string(tb[OCI_VERSION]), 3)) {
+ ERROR("unsupported ociVersion %s\n", blobmsg_get_string(tb[OCI_VERSION]));
+ return ENOTSUP;
+ }
+
+ if (tb[OCI_HOSTNAME])
+ opts.hostname = blobmsg_get_string(tb[OCI_HOSTNAME]);
+
+ if (!tb[OCI_PROCESS])
+ return ENODATA;
+
+ if ((res = parseOCIprocess(tb[OCI_PROCESS])))
+ return res;
+
+ if (!tb[OCI_ROOT])
+ return ENODATA;
+
+ if ((res = parseOCIroot(jsonfile, tb[OCI_ROOT])))
+ return res;
+
+ if (!tb[OCI_MOUNTS])
+ return ENODATA;
+
+ blobmsg_for_each_attr(cur, tb[OCI_MOUNTS], rem)
+ if ((res = parseOCImount(cur)))
+ return res;
+
+ if (tb[OCI_LINUX] && (res = parseOCIlinux(tb[OCI_LINUX])))
+ return res;
+
+ return 0;
+}
+
int main(int argc, char **argv)
{
sigset_t sigmask;
uid_t uid = getuid();
- char log[] = "/dev/log";
- char ubus[] = "/var/run/ubus.sock";
+ const char log[] = "/dev/log";
+ const char ubus[] = "/var/run/ubus.sock";
+ char *jsonfile = NULL;
int ch, i;
int pipes[4];
char sig_buf[1];
case 'y':
opts.console = 1;
break;
+ case 'J':
+ asprintf(&jsonfile, "%s/config.json", optarg);
+ break;
}
}
if (opts.namespace)
opts.namespace |= CLONE_NEWIPC | CLONE_NEWPID;
+ if (jsonfile) {
+ int ocires;
+ ocires = parseOCI(jsonfile);
+ free(jsonfile);
+ if (ocires) {
+ ERROR("parsing of OCI JSON spec has failed: %s (%d)\n", strerror(ocires), ocires);
+ return ocires;
+ }
+ }
+
if (opts.tmpoverlaysize && strlen(opts.tmpoverlaysize) > 8) {
ERROR("size parameter too long: \"%s\"\n", opts.tmpoverlaysize);
return -1;
}
/* no <binary> param found */
- if (argc - optind < 1) {
+ if (!jsonfile && (argc - optind < 1)) {
usage();
return EXIT_FAILURE;
}
}
DEBUG("Using namespaces(0x%08x), capabilities(%d), seccomp(%d)\n",
opts.namespace,
- opts.capabilities != 0,
- opts.seccomp != 0);
-
- opts.jail_argv = &argv[optind];
+ opts.capabilities != 0 || opts.capset.apply,
+ opts.seccomp != 0 || opts.ociseccomp != 0);
- get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
+ if (!jsonfile) {
+ opts.jail_argv = &argv[optind];
+ if (opts.namespace & CLONE_NEWUSER)
+ get_jail_user(&opts.pw_uid, &opts.pw_gid, &opts.gr_gid);
+ }
if (!opts.extroot) {
if (opts.namespace && add_path_and_deps(*opts.jail_argv, 1, -1, 0)) {
}
close(pipes[0]);
if (opts.namespace & CLONE_NEWUSER) {
- bool has_gr = (opts.gr_gid != -1);
- if (write_setgroups(jail_process.pid, false)) {
+ if (write_setgroups(jail_process.pid, true)) {
ERROR("can't write setgroups\n");
return -1;
}
- if (opts.pw_uid != -1) {
- write_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
- write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
+ if (!opts.uidmap) {
+ bool has_gr = (opts.gr_gid != -1);
+ if (opts.pw_uid != -1) {
+ write_single_uid_gid_map(jail_process.pid, 0, opts.pw_uid);
+ write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:opts.pw_gid);
+ } else {
+ write_single_uid_gid_map(jail_process.pid, 0, 65534);
+ write_single_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
+ }
} else {
- write_uid_gid_map(jail_process.pid, 0, 65534);
- write_uid_gid_map(jail_process.pid, 1, has_gr?opts.gr_gid:65534);
+ write_uid_gid_map(jail_process.pid, 0, opts.uidmap);
+ if (opts.gidmap)
+ write_uid_gid_map(jail_process.pid, 1, opts.gidmap);
}
}
#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
#define SECCOMP_RET_LOG 0x00070000U
+#define SECCOMP_RET_LOGALLOW 0x7ffc0000U
#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+#define SECCOMP_RET_KILLPROCESS 0x80000000U
#define SECCOMP_RET_ERROR(x) (SECCOMP_RET_ERRNO | ((x) & 0x0000ffffU))
#define SECCOMP_RET_LOGGER(x) (SECCOMP_RET_LOG | ((x) & 0x0000ffffU))
--- /dev/null
+/*
+ * parse and setup OCI seccomp filter
+ * seccomp example with syscall reporting
+ * Authors:
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#define _GNU_SOURCE 1
+#include <stddef.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <libubox/utils.h>
+#include <libubox/blobmsg.h>
+#include <libubox/blobmsg_json.h>
+
+#include "log.h"
+#include "seccomp-bpf.h"
+#include "seccomp-oci.h"
+#include "../syscall-names.h"
+#include "seccomp-syscalls-helpers.h"
+
+static uint32_t resolve_action(char *actname)
+{
+ if (!strcmp(actname, "SCMP_ACT_KILL"))
+ return SECCOMP_RET_KILL;
+ else if (!strcmp(actname, "SCMP_ACT_KILL_PROCESS"))
+ return SECCOMP_RET_KILLPROCESS;
+ else if (!strcmp(actname, "SCMP_ACT_TRAP"))
+ return SECCOMP_RET_TRAP;
+ else if (!strcmp(actname, "SCMP_ACT_ERRNO"))
+ return SECCOMP_RET_ERRNO;
+ else if (!strcmp(actname, "SCMP_ACT_ERROR"))
+ return SECCOMP_RET_ERRNO;
+ else if (!strcmp(actname, "SCMP_ACT_TRACE"))
+ return SECCOMP_RET_TRACE;
+ else if (!strcmp(actname, "SCMP_ACT_ALLOW"))
+ return SECCOMP_RET_ALLOW;
+ else if (!strcmp(actname, "SCMP_ACT_LOG"))
+ return SECCOMP_RET_LOGALLOW;
+ else {
+ ERROR("unknown seccomp action %s\n", actname);
+ return SECCOMP_RET_KILL;
+ }
+}
+
+static uint32_t resolve_architecture(char *archname)
+{
+ if (!strcmp(archname, "SCMP_ARCH_X86"))
+ return AUDIT_ARCH_I386;
+ else if (!strcmp(archname, "SCMP_ARCH_X86_64"))
+ return AUDIT_ARCH_X86_64;
+ else if (!strcmp(archname, "SCMP_ARCH_X32"))
+ /*
+ * return AUDIT_ARCH_X86_64;
+ * 32-bit userland on 64-bit kernel is not supported yet
+ */
+ return 0;
+ else if (!strcmp(archname, "SCMP_ARCH_ARM"))
+ return AUDIT_ARCH_ARM;
+ else if (!strcmp(archname, "SCMP_ARCH_AARCH64"))
+ return AUDIT_ARCH_AARCH64;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPS"))
+ return AUDIT_ARCH_MIPS;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPS64"))
+ return AUDIT_ARCH_MIPS64;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPS64N32"))
+ return AUDIT_ARCH_MIPS64N32;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPSEL"))
+ return AUDIT_ARCH_MIPSEL;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64"))
+ return AUDIT_ARCH_MIPSEL64;
+ else if (!strcmp(archname, "SCMP_ARCH_MIPSEL64N32"))
+ return AUDIT_ARCH_MIPSEL64N32;
+ else if (!strcmp(archname, "SCMP_ARCH_PPC"))
+ return AUDIT_ARCH_PPC;
+ else if (!strcmp(archname, "SCMP_ARCH_PPC64"))
+ return AUDIT_ARCH_PPC64;
+ else if (!strcmp(archname, "SCMP_ARCH_PPC64LE"))
+ return AUDIT_ARCH_PPC64LE;
+ else if (!strcmp(archname, "SCMP_ARCH_S390"))
+ return AUDIT_ARCH_S390;
+ else if (!strcmp(archname, "SCMP_ARCH_S390X"))
+ return AUDIT_ARCH_S390X;
+ else if (!strcmp(archname, "SCMP_ARCH_PARISC"))
+ return AUDIT_ARCH_PARISC;
+ else if (!strcmp(archname, "SCMP_ARCH_PARISC64"))
+ return AUDIT_ARCH_PARISC64;
+ else {
+ ERROR("unknown seccomp architecture %s\n", archname);
+ return 0;
+ }
+}
+
+enum {
+ OCI_LINUX_SECCOMP_DEFAULTACTION,
+ OCI_LINUX_SECCOMP_ARCHITECTURES,
+ OCI_LINUX_SECCOMP_FLAGS,
+ OCI_LINUX_SECCOMP_SYSCALLS,
+ __OCI_LINUX_SECCOMP_MAX,
+};
+
+static const struct blobmsg_policy oci_linux_seccomp_policy[] = {
+ [OCI_LINUX_SECCOMP_DEFAULTACTION] = { "defaultAction", BLOBMSG_TYPE_STRING },
+ [OCI_LINUX_SECCOMP_ARCHITECTURES] = { "architectures", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_FLAGS] = { "flags", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_SYSCALLS] = { "syscalls", BLOBMSG_TYPE_ARRAY },
+};
+
+enum {
+ OCI_LINUX_SECCOMP_SYSCALLS_NAMES,
+ OCI_LINUX_SECCOMP_SYSCALLS_ACTION,
+ OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS,
+ __OCI_LINUX_SECCOMP_SYSCALLS_MAX
+};
+
+static const struct blobmsg_policy oci_linux_seccomp_syscalls_policy[] = {
+ [OCI_LINUX_SECCOMP_SYSCALLS_NAMES] = { "names", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET] = { "errnoRet", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS] = { "args", BLOBMSG_TYPE_ARRAY },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ACTION] = { "action", BLOBMSG_TYPE_STRING },
+};
+
+enum {
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO,
+ OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP,
+ __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX
+};
+
+static const struct blobmsg_policy oci_linux_seccomp_syscalls_args_policy[] = {
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_INDEX] = { "index", BLOBMSG_TYPE_INT32 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUE] = { "value", BLOBMSG_TYPE_INT64 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_VALUETWO] = { "valueTwo", BLOBMSG_TYPE_INT64 },
+ [OCI_LINUX_SECCOMP_SYSCALLS_ARGS_OP] = { "op", BLOBMSG_TYPE_STRING },
+};
+
+struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg)
+{
+ struct blob_attr *tb[__OCI_LINUX_SECCOMP_MAX];
+ struct blob_attr *tbn[__OCI_LINUX_SECCOMP_SYSCALLS_MAX];
+ struct blob_attr *tba[__OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX];
+ struct blob_attr *cur, *curn, *curarg;
+ int rem, remn, remargs, sc;
+ struct sock_filter *filter;
+ struct sock_fprog *prog;
+ int sz = 5, idx = 0;
+ uint32_t default_policy = 0;
+ uint32_t seccomp_arch;
+
+ blobmsg_parse(oci_linux_seccomp_policy, __OCI_LINUX_SECCOMP_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+ if (!tb[OCI_LINUX_SECCOMP_DEFAULTACTION]) {
+ ERROR("seccomp: no default action set\n");
+ return NULL;
+ }
+
+ default_policy = resolve_action(blobmsg_get_string(tb[OCI_LINUX_SECCOMP_DEFAULTACTION]));
+
+ /* verify architecture while ignoring the x86_64 anomaly for now */
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_ARCHITECTURES], rem) {
+ seccomp_arch = resolve_architecture(blobmsg_get_string(cur));
+ /* take the first useful arch for now */
+ if (seccomp_arch)
+ break;
+ }
+
+ if (ARCH_NR != seccomp_arch) {
+ ERROR("seccomp architecture doesn't match system\n");
+ return NULL;
+ }
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
+ blobmsg_parse(oci_linux_seccomp_syscalls_policy, __OCI_LINUX_SECCOMP_SYSCALLS_MAX, tbn, blobmsg_data(cur), blobmsg_len(cur));
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn)
+ sz += 2;
+
+ if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS])
+ blobmsg_for_each_attr(curarg, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remargs)
+ sz++;
+ }
+
+ prog = malloc(sizeof(struct sock_fprog));
+ if (!prog)
+ return NULL;
+
+ filter = calloc(sz, sizeof(struct sock_filter));
+ if (!filter) {
+ ERROR("failed to allocate memory for seccomp filter\n");
+ goto errout2;
+ }
+
+ /* validate arch */
+ set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, arch_nr);
+ set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 1, 0, ARCH_NR);
+ set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, SECCOMP_RET_KILL);
+
+ /* get syscall */
+ set_filter(&filter[idx++], BPF_LD + BPF_W + BPF_ABS, 0, 0, syscall_nr);
+
+ blobmsg_for_each_attr(cur, tb[OCI_LINUX_SECCOMP_SYSCALLS], rem) {
+ uint32_t action;
+ blobmsg_parse(oci_linux_seccomp_syscalls_policy, __OCI_LINUX_SECCOMP_SYSCALLS_MAX, tbn, blobmsg_data(cur), blobmsg_len(cur));
+ action = resolve_action(blobmsg_get_string(tbn[OCI_LINUX_SECCOMP_SYSCALLS_ACTION]));
+ if (tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]) {
+ if (action != SECCOMP_RET_ERRNO)
+ goto errout1;
+
+ action = SECCOMP_RET_ERROR(blobmsg_get_u32(tbn[OCI_LINUX_SECCOMP_SYSCALLS_ERRNORET]));
+ } else if (action == SECCOMP_RET_ERRNO)
+ action = SECCOMP_RET_ERROR(EPERM);
+
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_NAMES], remn) {
+ sc = find_syscall(blobmsg_get_string(curn));
+ if (sc == -1) {
+ ERROR("unknown syscall '%s'\n", blobmsg_get_string(curn));
+ goto errout1;
+ }
+
+ /* add rule to filter */
+ set_filter(&filter[idx++], BPF_JMP + BPF_JEQ + BPF_K, 0, 1, sc);
+ set_filter(&filter[idx++], BPF_RET + BPF_K, 0, 0, action);
+
+ }
+ blobmsg_for_each_attr(curn, tbn[OCI_LINUX_SECCOMP_SYSCALLS_ARGS], remn) {
+ blobmsg_parse(oci_linux_seccomp_syscalls_args_policy, __OCI_LINUX_SECCOMP_SYSCALLS_ARGS_MAX, tba, blobmsg_data(curn), blobmsg_len(curn));
+ /* ToDo: process args */
+ }
+ }
+
+ set_filter(&filter[idx], BPF_RET + BPF_K, 0, 0, default_policy);
+
+ prog->len = (unsigned short) idx + 1;
+ prog->filter = filter;
+
+ return prog;
+
+errout1:
+ free(prog->filter);
+errout2:
+ free(prog);
+ return NULL;
+}
+
+
+int applyOCIlinuxseccomp(struct sock_fprog *prog)
+{
+ if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ ERROR("prctl(PR_SET_NO_NEW_PRIVS) failed: %m\n");
+ goto errout;
+ }
+
+ if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog)) {
+ ERROR("prctl(PR_SET_SECCOMP) failed: %m\n");
+ goto errout;
+ }
+ free(prog);
+
+ return 0;
+
+errout:
+ free(prog->filter);
+ free(prog);
+ return errno;
+}
--- /dev/null
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef _JAIL_SECCOMP_OCI_H_
+#define _JAIL_SECCOMP_OCI_H_
+
+#include <linux/filter.h>
+
+#ifdef SECCOMP_SUPPORT
+struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg);
+int applyOCIlinuxseccomp(struct sock_fprog *prog);
+#else
+
+
+struct sock_fprog *parseOCIlinuxseccomp(struct blob_attr *msg) {
+ return NULL;
+}
+
+int applyOCIlinuxseccomp(struct sock_fprog *prog) {
+ return ENOTSUP;
+}
+#endif
+
+#endif
--- /dev/null
+/*
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 2.1
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef _JAIL_SECCOMP_HELPERS_H_
+#define _JAIL_SECCOMP_HELPERS_H_
+
+static int find_syscall(const char *name)
+{
+ int i;
+
+ for (i = 0; i < SYSCALL_COUNT; i++) {
+ int sc = syscall_index_to_number(i);
+ if (syscall_name(sc) && !strcmp(syscall_name(sc), name))
+ return sc;
+ }
+
+ return -1;
+}
+
+static void set_filter(struct sock_filter *filter, __u16 code, __u8 jt, __u8 jf, __u32 k)
+{
+ filter->code = code;
+ filter->jt = jt;
+ filter->jf = jf;
+ filter->k = k;
+}
+
+#endif
#include "seccomp-bpf.h"
#include "seccomp.h"
#include "../syscall-names.h"
-
-static int find_syscall(const char *name)
-{
- int i;
-
- for (i = 0; i < SYSCALL_COUNT; i++) {
- int sc = syscall_index_to_number(i);
- if (syscall_name(sc) && !strcmp(syscall_name(sc), name))
- return sc;
- }
-
- return -1;
-}
-
-static void set_filter(struct sock_filter *filter, __u16 code, __u8 jt, __u8 jf, __u32 k)
-{
- filter->code = code;
- filter->jt = jt;
- filter->jf = jf;
- filter->k = k;
-}
+#include "seccomp-syscalls-helpers.h"
int install_syscall_filter(const char *argv, const char *file)
{
INSTANCE_ATTR_EXTROOT,
INSTANCE_ATTR_OVERLAYDIR,
INSTANCE_ATTR_TMPOVERLAYSIZE,
+ INSTANCE_ATTR_BUNDLE,
__INSTANCE_ATTR_MAX
};
[INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING },
[INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING },
+ [INSTANCE_ATTR_BUNDLE] = { "bundle", BLOBMSG_TYPE_STRING },
};
enum {
argv[argc++] = in->tmpoverlaysize;
}
+ if (in->bundle) {
+ argv[argc++] = "-J";
+ argv[argc++] = in->bundle;
+ }
+
if (in->require_jail)
argv[argc++] = "-E";
return;
}
- if (!in->command) {
+ if (!in->bundle && !in->command) {
LOG("Not starting instance %s::%s, command not set\n", in->srv->name, in->name);
return;
}
return true;
if (in->respawn_timeout != in_new->respawn_timeout)
return true;
-
+ if (in->bundle && in_new->bundle && strcmp(in->bundle, in_new->bundle))
+ return true;
if ((!in->seccomp && in_new->seccomp) ||
(in->seccomp && !in_new->seccomp) ||
(in->seccomp && in_new->seccomp && strcmp(in->seccomp, in_new->seccomp)))
if (in->no_new_privs)
jail->argc++;
+ if (in->bundle)
+ jail->argc += 2;
+
return true;
}
blobmsg_parse(instance_attr, __INSTANCE_ATTR_MAX, tb,
blobmsg_data(in->config), blobmsg_data_len(in->config));
- if (!instance_config_parse_command(in, tb))
- return false;
+ if (!tb[INSTANCE_ATTR_BUNDLE] && !instance_config_parse_command(in, tb))
+ return false;
if (tb[INSTANCE_ATTR_TERMTIMEOUT])
in->term_timeout = blobmsg_get_u32(tb[INSTANCE_ATTR_TERMTIMEOUT]);
if (tb[INSTANCE_ATTR_TMPOVERLAYSIZE])
in->tmpoverlaysize = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_TMPOVERLAYSIZE]));
+ if (tb[INSTANCE_ATTR_BUNDLE])
+ in->bundle = strdup(blobmsg_get_string(tb[INSTANCE_ATTR_BUNDLE]));
+
if (tb[INSTANCE_ATTR_PIDFILE]) {
char *pidfile = blobmsg_get_string(tb[INSTANCE_ATTR_PIDFILE]);
if (pidfile)
free(in->extroot);
free(in->overlaydir);
free(in->tmpoverlaysize);
+ free(in->bundle);
free(in->jail.name);
free(in->jail.hostname);
free(in->seccomp);
blobmsg_add_u32(b, "pid", in->proc.pid);
if (in->command)
blobmsg_add_blob(b, in->command);
+ if (in->bundle)
+ blobmsg_add_string(b, "bundle", in->bundle);
blobmsg_add_u32(b, "term_timeout", in->term_timeout);
if (!in->proc.pending)
blobmsg_add_u32(b, "exit_code", in->exit_code);
void *r = blobmsg_open_table(b, "jail");
if (in->jail.name)
blobmsg_add_string(b, "name", in->jail.name);
- if (in->jail.hostname)
- blobmsg_add_string(b, "hostname", in->jail.hostname);
-
- blobmsg_add_u8(b, "procfs", in->jail.procfs);
- blobmsg_add_u8(b, "sysfs", in->jail.sysfs);
- blobmsg_add_u8(b, "ubus", in->jail.ubus);
- blobmsg_add_u8(b, "log", in->jail.log);
- blobmsg_add_u8(b, "ronly", in->jail.ronly);
- blobmsg_add_u8(b, "netns", in->jail.netns);
- blobmsg_add_u8(b, "userns", in->jail.userns);
- blobmsg_add_u8(b, "cgroupsns", in->jail.cgroupsns);
+ if (!in->bundle) {
+ if (in->jail.hostname)
+ blobmsg_add_string(b, "hostname", in->jail.hostname);
+
+ blobmsg_add_u8(b, "procfs", in->jail.procfs);
+ blobmsg_add_u8(b, "sysfs", in->jail.sysfs);
+ blobmsg_add_u8(b, "ubus", in->jail.ubus);
+ blobmsg_add_u8(b, "log", in->jail.log);
+ blobmsg_add_u8(b, "ronly", in->jail.ronly);
+ blobmsg_add_u8(b, "netns", in->jail.netns);
+ blobmsg_add_u8(b, "userns", in->jail.userns);
+ blobmsg_add_u8(b, "cgroupsns", in->jail.cgroupsns);
+ }
blobmsg_add_u8(b, "console", (in->console.fd.fd > -1));
blobmsg_close_table(b, r);
if (!avl_is_empty(&in->jail.mount.avl)) {
char *extroot;
char *overlaydir;
char *tmpoverlaysize;
+ char *bundle;
int syslog_facility;
int exit_code;